#imports
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import folium
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import math
import random
from datetime import timedelta
import warnings
warnings.filterwarnings('ignore')
#colour pallette
cnf ='#393e46'
dth = '#ff2e63'
rec = '#21bf73'
act = '#fe9801'
#DATASET PREPARATION
import plotly as py
py.offline.init_notebook_mode(connected = True)
df = pd.read_csv('covid_19_data_cleaned.csv',parse_dates=['Date'])
df
| Date | Province/State | Country | Lat | Long | Confirmed | Recovered | Deaths | Active | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | NaN | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 |
| 1 | 2020-01-23 | NaN | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 |
| 2 | 2020-01-24 | NaN | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 |
| 3 | 2020-01-25 | NaN | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 |
| 4 | 2020-01-26 | NaN | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 136280 | 2021-05-16 | NaN | Timor-Leste | -8.87420 | 125.727500 | 0 | 2336 | 0 | -2336 |
| 136281 | 2021-05-17 | NaN | Timor-Leste | -8.87420 | 125.727500 | 0 | 2406 | 0 | -2406 |
| 136282 | 2021-05-18 | NaN | Timor-Leste | -8.87420 | 125.727500 | 0 | 2517 | 0 | -2517 |
| 136283 | 2021-05-19 | NaN | Timor-Leste | -8.87420 | 125.727500 | 0 | 2636 | 0 | -2636 |
| 136284 | 2021-05-20 | NaN | Timor-Leste | -8.87420 | 125.727500 | 0 | 2716 | 0 | -2716 |
136285 rows × 9 columns
df['Province/State'] = df['Province/State'].fillna("")
df
| Date | Province/State | Country | Lat | Long | Confirmed | Recovered | Deaths | Active | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | |
| 1 | 2020-01-23 | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | |
| 2 | 2020-01-24 | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | |
| 3 | 2020-01-25 | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | |
| 4 | 2020-01-26 | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 136280 | 2021-05-16 | Timor-Leste | -8.87420 | 125.727500 | 0 | 2336 | 0 | -2336 | |
| 136281 | 2021-05-17 | Timor-Leste | -8.87420 | 125.727500 | 0 | 2406 | 0 | -2406 | |
| 136282 | 2021-05-18 | Timor-Leste | -8.87420 | 125.727500 | 0 | 2517 | 0 | -2517 | |
| 136283 | 2021-05-19 | Timor-Leste | -8.87420 | 125.727500 | 0 | 2636 | 0 | -2636 | |
| 136284 | 2021-05-20 | Timor-Leste | -8.87420 | 125.727500 | 0 | 2716 | 0 | -2716 |
136285 rows × 9 columns
df = pd.read_csv('covid_19_data_cleaned.csv',parse_dates=['Date'])
country_daywise = pd.read_csv('country_daywise.csv',parse_dates=['Date'])
countrywise = pd.read_csv('countrywise.csv')
daywise = pd.read_csv('daywise.csv',parse_dates=['Date'])
df['Province/State'] = df['Province/State'].fillna("")
df.head()
| Date | Province/State | Country | Lat | Long | Confirmed | Recovered | Deaths | Active | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | |
| 1 | 2020-01-23 | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | |
| 2 | 2020-01-24 | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | |
| 3 | 2020-01-25 | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | |
| 4 | 2020-01-26 | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 |
confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
recovered= df.groupby('Date').sum()['Recovered'].reset_index()
deaths= df.groupby('Date').sum()['Deaths'].reset_index()
deaths
| Date | Deaths | |
|---|---|---|
| 0 | 2020-01-22 | 17 |
| 1 | 2020-01-23 | 18 |
| 2 | 2020-01-24 | 26 |
| 3 | 2020-01-25 | 42 |
| 4 | 2020-01-26 | 56 |
| ... | ... | ... |
| 480 | 2021-05-16 | 3379449 |
| 481 | 2021-05-17 | 3390089 |
| 482 | 2021-05-18 | 3404338 |
| 483 | 2021-05-19 | 3417682 |
| 484 | 2021-05-20 | 3430326 |
485 rows × 2 columns
df.isnull().sum()
Date 0 Province/State 0 Country 0 Lat 0 Long 0 Confirmed 0 Recovered 0 Deaths 0 Active 0 dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 136285 entries, 0 to 136284 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 136285 non-null datetime64[ns] 1 Province/State 136285 non-null object 2 Country 136285 non-null object 3 Lat 136285 non-null float64 4 Long 136285 non-null float64 5 Confirmed 136285 non-null int64 6 Recovered 136285 non-null int64 7 Deaths 136285 non-null int64 8 Active 136285 non-null int64 dtypes: datetime64[ns](1), float64(2), int64(4), object(2) memory usage: 9.4+ MB
df.query('Country == "India"')
| Date | Province/State | Country | Lat | Long | Confirmed | Recovered | Deaths | Active | |
|---|---|---|---|---|---|---|---|---|---|
| 71295 | 2020-01-22 | India | 20.593684 | 78.96288 | 0 | 0 | 0 | 0 | |
| 71296 | 2020-01-23 | India | 20.593684 | 78.96288 | 0 | 0 | 0 | 0 | |
| 71297 | 2020-01-24 | India | 20.593684 | 78.96288 | 0 | 0 | 0 | 0 | |
| 71298 | 2020-01-25 | India | 20.593684 | 78.96288 | 0 | 0 | 0 | 0 | |
| 71299 | 2020-01-26 | India | 20.593684 | 78.96288 | 0 | 0 | 0 | 0 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 71775 | 2021-05-16 | India | 20.593684 | 78.96288 | 24965463 | 21174076 | 274390 | 3516997 | |
| 71776 | 2021-05-17 | India | 20.593684 | 78.96288 | 25228996 | 21596512 | 278719 | 3353765 | |
| 71777 | 2021-05-18 | India | 20.593684 | 78.96288 | 25496330 | 21986363 | 283248 | 3226719 | |
| 71778 | 2021-05-19 | India | 20.593684 | 78.96288 | 25772440 | 22355440 | 287122 | 3129878 | |
| 71779 | 2021-05-20 | India | 20.593684 | 78.96288 | 26031991 | 22712735 | 291331 | 3027925 |
485 rows × 9 columns
#WORLDWIDE Total Confirmed,Recovered, and Deaths
confirmed.tail()
| Date | Confirmed | |
|---|---|---|
| 480 | 2021-05-16 | 163069932 |
| 481 | 2021-05-17 | 163609626 |
| 482 | 2021-05-18 | 164231810 |
| 483 | 2021-05-19 | 164902902 |
| 484 | 2021-05-20 | 165531431 |
recovered.tail()
| Date | Recovered | |
|---|---|---|
| 480 | 2021-05-16 | 99064469 |
| 481 | 2021-05-17 | 99808931 |
| 482 | 2021-05-18 | 100552044 |
| 483 | 2021-05-19 | 101250071 |
| 484 | 2021-05-20 | 101821428 |
deaths.tail()
| Date | Deaths | |
|---|---|---|
| 480 | 2021-05-16 | 3379449 |
| 481 | 2021-05-17 | 3390089 |
| 482 | 2021-05-18 | 3404338 |
| 483 | 2021-05-19 | 3417682 |
| 484 | 2021-05-20 | 3430326 |
#Scatterplot for Confirmed, Recovered and Deaths
fig = go.Figure()
fig.add_trace(go.Scatter(x =confirmed['Date'],y= confirmed['Confirmed'], mode = 'lines+markers',name='Confirmed',line = dict(color = "Black")))
fig.add_trace(go.Scatter(x =recovered['Date'],y= recovered['Recovered'], mode = 'lines+markers',name='Recovered',line = dict(color = "green")))
fig.add_trace(go.Scatter(x =deaths['Date'],y= deaths['Deaths'], mode = 'lines+markers',name='Deaths',line = dict(color = "Red")))
fig.update_layout(title = 'worldwide Covid-19', xaxis_tickfont_size = 14, yaxis = dict(title = 'Number of Cases'))
fig.show()
# CASES Density Animation ON WORLD MAP
df.info
<bound method DataFrame.info of Date Province/State Country Lat Long \
0 2020-01-22 Afghanistan 33.93911 67.709953
1 2020-01-23 Afghanistan 33.93911 67.709953
2 2020-01-24 Afghanistan 33.93911 67.709953
3 2020-01-25 Afghanistan 33.93911 67.709953
4 2020-01-26 Afghanistan 33.93911 67.709953
... ... ... ... ... ...
136280 2021-05-16 Timor-Leste -8.87420 125.727500
136281 2021-05-17 Timor-Leste -8.87420 125.727500
136282 2021-05-18 Timor-Leste -8.87420 125.727500
136283 2021-05-19 Timor-Leste -8.87420 125.727500
136284 2021-05-20 Timor-Leste -8.87420 125.727500
Confirmed Recovered Deaths Active
0 0 0 0 0
1 0 0 0 0
2 0 0 0 0
3 0 0 0 0
4 0 0 0 0
... ... ... ... ...
136280 0 2336 0 -2336
136281 0 2406 0 -2406
136282 0 2517 0 -2517
136283 0 2636 0 -2636
136284 0 2716 0 -2716
[136285 rows x 9 columns]>
df['Date'] = df['Date'].astype(str)
df.info
<bound method DataFrame.info of Date Province/State Country Lat Long \
0 2020-01-22 Afghanistan 33.93911 67.709953
1 2020-01-23 Afghanistan 33.93911 67.709953
2 2020-01-24 Afghanistan 33.93911 67.709953
3 2020-01-25 Afghanistan 33.93911 67.709953
4 2020-01-26 Afghanistan 33.93911 67.709953
... ... ... ... ... ...
136280 2021-05-16 Timor-Leste -8.87420 125.727500
136281 2021-05-17 Timor-Leste -8.87420 125.727500
136282 2021-05-18 Timor-Leste -8.87420 125.727500
136283 2021-05-19 Timor-Leste -8.87420 125.727500
136284 2021-05-20 Timor-Leste -8.87420 125.727500
Confirmed Recovered Deaths Active
0 0 0 0 0
1 0 0 0 0
2 0 0 0 0
3 0 0 0 0
4 0 0 0 0
... ... ... ... ...
136280 0 2336 0 -2336
136281 0 2406 0 -2406
136282 0 2517 0 -2517
136283 0 2636 0 -2636
136284 0 2716 0 -2716
[136285 rows x 9 columns]>
fig = px.density_mapbox(df, lat = 'Lat', lon = 'Long',hover_name = 'Country', hover_data=['Confirmed','Recovered','Deaths'],animation_frame = 'Date', color_continuous_scale='Portland', radius =7 ,zoom = 0,height = 700)
fig.update_layout(title = 'worldwide Covid-19 Cases with Time laps')
fig.update_layout(mapbox_style= 'open-street-map',mapbox_center_lon = 0 )
fig.show()
#CASES Over the Time with Area Plot
temp = df.groupby('Date')['Confirmed','Deaths','Recovered','Active'].sum().reset_index()
temp =temp[temp['Date']==max(temp['Date'])].reset_index(drop = True)
tm = temp.melt(id_vars = 'Date', value_vars = ['Active','Deaths','Recovered'])
fig = px.treemap(tm,path = ['variable'],values ='value',height=250,width = 800,color_discrete_sequence=[act,rec,dth])
fig.data[0].textinfo = 'label+text+value'
fig.show()
temp = df.groupby('Date')['Recovered','Deaths','Active'].sum().reset_index()
temp = temp.melt(id_vars = 'Date', value_vars = ['Recovered','Deaths','Active'], var_name ='Case',value_name = 'Count')
fig = px.area(temp, x ='Date',y = 'Count', color = 'Case', height =600, title = 'Cases over time',color_discrete_sequence=[rec,dth,act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()